{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a9bc1c1a-53bc-4b86-9140-4f1af0128037",
   "metadata": {},
   "source": [
    "<table style=\"width:100%\">\n",
    "<tr>\n",
    "<td style=\"vertical-align:middle; text-align:left;\">\n",
    "<font size=\"2\">\n",
    "Supplementary code for the <a href=\"http://mng.bz/orYv\">Build a Large Language Model From Scratch</a> book by <a href=\"https://sebastianraschka.com\">Sebastian Raschka</a><br>\n",
    "<br>Code repository: <a href=\"https://github.com/rasbt/LLMs-from-scratch\">https://github.com/rasbt/LLMs-from-scratch</a>\n",
    "</font>\n",
    "</td>\n",
    "<td style=\"vertical-align:middle; text-align:left;\">\n",
    "<a href=\"http://mng.bz/orYv\"><img src=\"https://sebastianraschka.com/images/LLMs-from-scratch-images/cover-small.webp\" width=\"100px\"></a>\n",
    "</td>\n",
    "</tr>\n",
    "</table>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5250207d-f811-46df-9d16-4ac1e9ce1c66",
   "metadata": {},
   "source": [
    "# Score Correlation Analysis"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "badc7ffb-d51c-4de0-97c5-b54cf3e28315",
   "metadata": {},
   "source": [
    "- This notebook analyses the correlation between the different evaluation method scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "fa39424b-e058-4351-94ec-249b812ae8fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open(\"gpt4-model-1-response.json\", \"r\") as file:\n",
    "    gpt4_model_1 = json.load(file)\n",
    "\n",
    "with open(\"llama3-8b-model-1-response.json\", \"r\") as file:\n",
    "    llama3_8b_model_1 = json.load(file)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ef67d30-7602-4695-a190-16209a152621",
   "metadata": {},
   "source": [
    "## GPT-4 vs Llama 3 8B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "2a0d4288-507f-414c-afde-9742935cd8bc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA98ElEQVR4nO3de3gU9cH+/3tz5pBsGpAcJJAVUIigcjYG9VGhUDGtytWWFhSiP1ojqIhHVIxpC8Gnv8daq2BBiVZAqq1YYy3qA1QFA4SjYBTxMRxUklRjsuGQBLLz/SPNliUJbGD2NPt+XVeuNrOfTD6MMLmzM597bIZhGAIAALCoiEBPAAAAwJcIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNKiAj2BYOByufT1118rPj5eNpst0NMBAABeMAxDdXV1SktLU0RE++/fEHYkff3110pPTw/0NAAAwBk4cOCAevbs2e7rhB1J8fHxkpoPVkJCQoBnAwAAvOF0OpWenu7+Od4ewo7kvnSVkJBA2AEAIMSc7hYUblAGAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRoMyAADwiaONTZr3Vpn2fntEGd0666FrM9UpJtLv8wjoOzvvv/++cnJylJaWJpvNptdff93jdcMw9Oijjyo1NVWdOnXS6NGjtWfPHo8x1dXVmjRpkhISEpSYmKhbb71Vhw4d8uOfAgAAnGzan0o14NFVemnDfn2w5xu9tGG/Bjy6StP+VOr3uQQ07Bw+fFgXX3yxnnnmmTZf/+///m899dRTevbZZ7Vx40Z16dJFY8eOVX19vXvMpEmT9PHHH+vdd9/Vm2++qffff1+/+MUv/PVHAAAAJ5n2p1K9W1bV5mvvllX5PfDYDMMw/Pod22Gz2bRy5Updf/31kprf1UlLS9M999yje++9V5JUW1ur5ORkvfDCC5o4caI++eQTZWZmqrS0VMOGDZMkrVq1Stdee62+/PJLpaWlefW9nU6n7Ha7amtreRAoAABn4WhjkwY8uuq04z751bizvqTl7c/voL1Buby8XBUVFRo9erR7m91u18iRI1VSUiJJKikpUWJiojvoSNLo0aMVERGhjRs3trvvhoYGOZ1Ojw8AAHD25r1VZuo4MwRt2KmoqJAkJScne2xPTk52v1ZRUaEePXp4vB4VFaWkpCT3mLYUFhbKbre7P9LT002ePQAA4Wnvt0dMHWeGoA07vjR79mzV1ta6Pw4cOBDoKQEAYAkZ3TqbOs4MQbv0PCUlRZJUWVmp1NRU9/bKykpdcskl7jFVVZ43QB0/flzV1dXur29LbGysYmNjzZ804IUml6FN5dWqqqtXj/g4jXAkKTLCFuhpAUCHNB536aWSvdpXfUS9kzrrpqwMxURF6KFrM/XShv2n/fqHrs30wyybBW3YcTgcSklJ0erVq93hxul0auPGjcrLy5MkZWVlqaamRlu2bNHQoUMlSWvWrJHL5dLIkSMDNXWgXat2HVRBcZkO1v5nRWGqPU75OZkaNzD1FF8JAMGj8K0yLf6gXK4TljjNfesTTbvcodnXZmpMZo92V2NJ0pjMHn7t2wlo2Dl06JA+//xz9+fl5eXavn27kpKS1KtXL82cOVO/+c1v1K9fPzkcDs2ZM0dpaWnuFVsDBgzQuHHjNG3aND377LM6duyYZsyYoYkTJ3q9Egvwl1W7Dipv6VadvPyxorZeeUu3auHkIQQeAEGv8K0y/fH98lbbXYbc208VdLx53WwBvWdn8+bNGjx4sAYPHixJmjVrlgYPHqxHH31UknT//ffrjjvu0C9+8QsNHz5chw4d0qpVqxQXF+fex7Jly9S/f39dc801uvbaazVq1CgtWrQoIH8eoD1NLkMFxWWtgo4k97aC4jI1uYKiCQIA2tR43KXFH7QOOidqKwi1ZcNn35oxJa8ETc9OINGzA18r+b9v9bPFG0477uVplyqrTzc/zAgAOu75D77Qr//+iWn72zt//Fl9fcj37ABWUlVXf/pBHRgHAIGwr9p/y8XNRNgB/KBHfNzpB3VgHAAEQu8k/y0XNxNhB/CDEY4kpdrj1N4Cc5uaV2WNcCT5c1oA0CE3ZWXIrKaMnw9vvyLGbIQdwA8iI2zKz2nulDj5PNHyeX5OJn07AIJaTFSEpl3uOOWYzNR4r/YVFeW/vjvCDuAn4wamauHkIUqxe16qSrHHsewcQMiYfW2mfnmFo9U7PBE26ZdXODRhSE+v9uPPS2KsxhKrseBfNCgDsIL2GpT/5WzQ8Hn/e9qvL31otM5JOLt3d7z9+U3YEWEHAACzTFiwXlv215x23NBeifrr7dln9b1Yeg4AAPzu61rvKjS8HWcGwg4AADCNPdq7C0bejjMDYQcAAJjm8+oGU8eZgbADAABMc9xl7jgzEHYAAIBpor1MFt6OMwNhBwAAmObNGVeYOs4MhB0AAGCa6qONpo4zA2EHAACYpqrOuyXl3o4zA2EHAACYpkd83OkHdWCcGaL89p0AwM94NAfgfyMcSUq1x6mitl5tNenY1PxMwBGOJL/NibADwJJW7TqoguIyHTyhpTXVHqf8nEweugr4UGSETfk5mbpt6dY2Xzck5edk+vUXDy5jAbCcVbsOKm/pVo+gI0kVtfXKW7pVq3YdDNDMgPCw4J+fn9XrZiPsALCUJpehguKyNt8+b9lWUFymJlfYPwMZ8IlD9cf10ZfOU4756EunDtUf99OMCDsALGZTeXWrd3ROZEg6WFuvTeXV/psUEEbu/vM2U8eZgbADwFKCcdkrEE72f3fU1HFmIOwAsJRgXPYKhJNe3+tk6jgzEHYAWErLstf21nnY1Lwqy5/LXoFwYBiGPvy/b9Tk5QM+f/fTwb6d0AlYeg7AUlqWveYt3Sqb5HGjcksA8veyV8DK6o816Y3tX2vJ+nJ9WlHn1ddc1DNBXeP8F0EIOwAsZ9zAVC2cPKRVz04KPTuAaSqd9XqpZJ+Wb9qv6sPNz7nqFB2pG4ecq9e3faXDjU3tfu3eb474a5qSCDsALGrcwFSNyUyhQRkw2fYDNSpaX66/f3RQx/9d4XBuYifdnNVbE4f3UuNxl5Zt3H/KfTjrj+tfzgadkxDrjykTdgBYV2SETVl9ugV6GkDIO9bk0qpdFSpaX66t+2vc24dnfE+52Q59PzNZUZHNtwGPmr/aq33esGCd1j14jS+m2wphBwAAtOm7w416uXS/XirZ574kHB1pU85FacrNdmhQT3urr6k+fMyrfXs7zgyEHQAA4OGzyjoVrS/Xym1fqf5Y8/Kq7l1jNGlkb026tNcpqxuSukTrSE379+ucOM5fCDsAAEAul6G1u6tUtH6v1n3+jXv7hWkJys12KOfiVMVGRZ52PytvH6Xh8/7Xq3H+QtgBACCMHWo4rr9sPqAXS/ap/JvDkqQIm/T9zBTlZmdohCNJNpv3N/afkxCrhLgoOU/x7KuEuCi/3ZwsEXYAAAhL+789ohdL9uqV0gOqa2gOJvFxUZo4PF03Z2UoPanzGe/7o8fG6qLH3m4z8CTERemjx8ae8b7PBGHHR5pcBkteAQBBxTAMbfiiWkvWl+t/P6mU8e/WzfO6d9HU7AxNGNJTXWLNiQYfPTZW/3I26IYF61R9+JiSukRr5e2j/PqOTgvCjg+s2nWwVZlZKmVmAIAAaa/l+Irzz1Fudoau7HeOInzwC/k5CbF+W15+KoQdk63adVB5S7d6VNRLUkVtvfKWbtXCyUMIPAAAv6h01mvphn1avnG/vj2p5Tg3O0N9e8QHeIb+QdgxUZPLUEFxWaugIzU/n8cmqaC4TGMyU7ikBQDwmR3/bjl+84SW4zR7nG6+LEMTh6crsXNMgGfoX4QdE20qr/a4dHUyQ9LB2nptKq+m1RUAYKpjTS69/XGFlqw7fctxuCHsmKiqrv2gcybjAAA4nTNpOQ43hB0TnapR8kzGAQDQnuaW471aue1Lj5bjn4/srcmnaTkON4QdE41wJCnVHqeK2vo279uxSUqxNy9DhzlY4g8gnLhchv75WZWWrPNsOc5MTdAtoxy67qJUxUWfvuXYDN6cfxuPu/RSyV7tqz6i3kmddVNWhmKi/H8pjbBjosgIm/JzMnXb0q1tvm5Iys/J5IexSVjiDyBcmN1yfLa8Of8WvlWmxR+Uy3XCb/9z3/pE0y53aPa1mX6bq0TYQYhiiT+AcODLluMz5c35d9v+7/TH98tbfa3LkHu7PwOPzTCMtq64hBWn0ym73a7a2lolJCSc8X6aXIZGPb6m3RVZLZex1j1wNe/unAWOMwArO1XLcW52hm40seW4o7w9/7Z3O0eLCJv06a9/cNaXtLz9+c07OyZi6bl/cJwBWFF7LceX9+uuW0Y5fNZy3BHenn9Px2VIL5Xs1a2Xn2fi7NpH2DERS8/9g+MMwEpCqeXYzPPqvuojpu3rdAg7JmLpuX9wnAFYQSi2HJt5Xu3tx/uNCDsmYum5f3CcAYSq9lqOh/X+nm4ZFfwtx96ef725Z+emrAzfTLKt7+e37xQGWpaeS83/wU/U8jlLz88exxlAqPnucKMW/PNzXfHfazVj+TZt3V+j6Eibbhx8ropnjNJf8i7TtYNSgzroSN6ff39xheOU+5l2ucOvfTusxpJ5q7Fa0P/iHxxnAMHulC3HI3upR0JoXm4/056dCJtM7dnx9uc3YUfmhx2JZl9/4TgDCDbB1HLsS8HQoMzSc4SFyAgby8sBBIVgazn2NW/OvzFREX5bXn4qhB0f4PIKAISPYGw5hifCjsl4jAEAWN+pWo6nZmdoQgBbjtEa/yVM1OQyVFBc1uZyO0PNd6oXFJdpTGYK95UAQAgKhZZjtEbYMRGPMQAAa2ppOV62cb+qg7zlGK0RdkzEYwwAwFpCseUYrRF2TMRjDAAg9J2q5Tg326GxFwZ3yzFaI+yYiMcYAMGFHiZ0xHeHG/Vy6X69VLLPfUtCdKRNORelKTfboUE97afdh697ZULNv5wNumHBOlUfPqakLtFaefsonZMQ6/d5BHWpYFNTkx577DEtXbpUFRUVSktL09SpU/XII4+4uwoMw1B+fr4WL16smpoaZWdna+HCherXr5/X38fMUsGW1ViSPAJPy+mV1ViAf1ABAW+Z1XLsj8bgUHLRY2/LWX+81faEuCh99NhYU76HJRqU582bpyeeeEIvvviiLrzwQm3evFm5ubmaO3eu7rzzTknS448/rsLCQr344otyOByaM2eOdu7cqbKyMsXFefcXlMdFANbSXgUEv3SghctlaO3uKhWtN6fluPCtMv3x/fJ2X//lFeEVeNoLOi3MCjyWCDvXXXedkpOT9fzzz7u3TZgwQZ06ddLSpUtlGIbS0tJ0zz336N5775Uk1dbWKjk5WS+88IImTpzo1ffhcRGAdTS5DI16fE27KyNbLieve+Bq/k2GIV+0HDced6n/nH94vKNzsgib9OmvfxAWl7T+5WzQ8Hn/e9pxpQ+NPutLWpZ4XMRll12mRYsW6bPPPtP555+vHTt2aN26dXriiSckSeXl5aqoqNDo0aPdX2O32zVy5EiVlJS0G3YaGhrU0NDg/tzpdJo+dx5jAAQGFRBoiy9bjl8q2XvKoCNJLqN5XDA8OsHXbliwzutx6x68xsezaRbUYefBBx+U0+lU//79FRkZqaamJs2dO1eTJk2SJFVUVEiSkpOTPb4uOTnZ/VpbCgsLVVBQ4LuJAwgYKiDQwl8tx/uqj5g6LtRVHz5m6jgzBHXYeeWVV7Rs2TItX75cF154obZv366ZM2cqLS1NU6ZMOeP9zp49W7NmzXJ/7nQ6lZ6ebsaUAQQYFRDwd8txby/fFfJ2XKhL6hKtIzVNXo3zl6AOO/fdd58efPBB9+WoQYMGad++fSosLNSUKVOUkpIiSaqsrFRq6n9uNqysrNQll1zS7n5jY2MVG+v/pW8AfI8KiPAVqJbjm7IyNPetT057z85NWRk++f7BZuXto7y6Z2fl7aP8MJtmQR12jhw5oogIz5u5IiMj5XI1Lw10OBxKSUnR6tWr3eHG6XRq48aNysvL8/d0AQSByAib8nMylbd0q2xquwIiPyeTm5MtJNAtxzFREZp2ueOUq7GmXe4Ii5uTJemchFglxEWddjWWP/t2gjrs5OTkaO7cuerVq5cuvPBCbdu2TU888YRuueUWSZLNZtPMmTP1m9/8Rv369XMvPU9LS9P1118f2MkDCJhxA1O1cPKQVhUQKVRAWEawtRy3LCunZ6fZR4+N9UvPjreCeul5XV2d5syZo5UrV6qqqkppaWn62c9+pkcffVQxMc1JvaVUcNGiRaqpqdGoUaO0YMECnX/++V5/H18sPQcQeFRAWI8ZLce+RIOyJ183KFuiZ8dfCDsAENzMajmGtViiZwcAEL7MbjlG+CLsAACCii9ajhHeCDsAgKDgy5ZjhDfCDgAgYPzVcozwxt8gAIDf+bvlGOGNsAMA8JtAtRwjvBF2AAA+F+iWY4Q3wg4AwCeCreUY4YuwAwAwVbC3HCP8EHYAAKag5RjBirADADhjtBwjFBB2AAAdRssxQglhBwDgNVqOEYoIOwCAU6LlGKGOv50AgDbRcgyrIOwAADzQcgyrIewAACTRcgzrIuwAQBij5RjhgLADAGGIlmOEE8IOAIQRWo4Rjgg7AGBxtBwj3BF2AMCiaDkGmhF2AMBiaDkGPBF2AMACaDkG2sfffAAIYbQcA6dH2AGAEETLMeA9wo6PNLkMbSqvVlVdvXrEx2mEI0mR/HYF4CzRcgx0HGHHB1btOqiC4jJ3UZckpdrjlJ+TqXEDUwM4MwChiJZj4OwQdky2atdB5S3dKuOk7RW19cpbulULJw8h8ADwCi3HgDkIOyZqchkqKC5rFXQkyZBkk1RQXKYxmSlc0gLQLlqOAXMRdky0qbza49LVyQxJB2vrtam8Wll9uvlvYgBM5Yt78mg5hhUFy/2rhB0TVdW1H3TOZByA4GP2PXm0HMOqgun+VcKOiXrEe/fWsrfjAAQXM+/Jo+UYVhZs968Sdkw0wpGkVHucKmrr27xvxyYpxd78Nh6A0GLGPXm0HCMcBOP9q/yrMlFkhE35OZnKW7pVNsnjP3TLf878nExuTgZC0Nnck0fLMcJJMN6/Stgx2biBqVo4eUir65Qp9OwAIe1M7smj5RjhKBjvXyXs+MC4gakak5kSFHegAzBHR+7Jo+UY4SwY718l7PhIZISN5eWAhZzunjxJSuwUrf9++1Nto+UYYSwY71/lXx4AeKHlnjzpP/fgnazm6DFt21+j6Eibbhx8ropnjNJf8i7T+ItSCToIG6f6txKo+1dthmG090tK2HA6nbLb7aqtrVVCQkKgpwMgiLXVHdKClmPgP/zRs+Ptz28uYwGAl1wuQ9GRETrvnC4eJ/DM1ATlZmco5+I0Wo6Bfwum+1cJOwBwGrQcA2cmWO5fJewAQDtoOQasgbADACeg5RiwHv7FAoD+3XK842stWUfLMWA1hB0AYa2l5Xj5xv36lpZjwJIIOwDCEi3HQPgg7AAIG8eaXHr74wotWVeurSe0HA/PaG45/n4mLceAFRF2AFjed4cb9XLpfr1Uss/djxMdaVPORWnKzXZoUE97gGcIwJcIOwAs67PKOhWt36uV275U/TGXpBNaji/t5dcHEQIIHMIOAEtxuQz987MqFa3fqw/2fOPefmFagnKzHcq5OFWxUbQcA+GEsAPAEg41HNdft3ypFz7cS8sxAA+EHQAh7UD1Eb3wIS3HANpH2AEQclpajov+3XLsOqHlODc7QzfScgzgBJwNAISMlpbjovV79clBp3v7Feefo9zsDFqOAbSJsAMg6FU56/USLccAzhBhB0DQamk5/vvOgzrW1Hyt6tzETro5q7cmDu8le+foAM8QQCjoUNhZu3attm7dqksvvVTZ2dn64x//qLlz5+ro0aO6/vrr9dRTT6lTp06+miuAMHC8yaVVH1eoaP1ebdn3nXs7LccAzpTXYWfx4sXKy8uTw+HQww8/rPz8fM2dO1c33XSTIiIitHTpUnXr1k3z58/35XwBD00uQ5vKq1VVV68e8XEa4UhSJPdshCRajoHA8dW5NFjO0TbDMAxvBg4cOFC//OUvdccdd2jVqlXKycnRc889pylTpkiSXn31Vc2ePVuff/65qRP86quv9MADD+gf//iHjhw5or59+6qoqEjDhg2T1LwqIz8/X4sXL1ZNTY2ys7O1cOFC9evXz+vv4XQ6ZbfbVVtbq4SEBFPnD99ZteugCorL3D8YJSnVHqf8nEyNG5gawJmhI2g5BgLLV+dSf5yjvf357XXY6dy5sz755BP17t1bkhQTE6MdO3ZowIABkqT9+/erX79+amhoMGH6zb777jsNHjxYV111lfLy8nTOOedoz5496tOnj/r06SNJevzxx1VYWKgXX3xRDodDc+bM0c6dO1VWVqa4OO9OkoSd0LNq10HlLd2qk//ytvy+sHDyEAJPEKPlGAgOvjqX+usc7e3Pb68vY9XX13vcjxMbG6vY2FiPz48fP36G023b448/rvT0dBUVFbm3ORwO9/83DENPPvmkHnnkEf3oRz+SJP3pT39ScnKyXn/9dU2cONHU+SA4NLkMFRSXtfpHJEmGmv8xFRSXaUxmCpe0ggwtx0Dw8NW5NBjP0V7f5Wez2VRXVyen06na2lrZbDYdOnRITqfT/WG2N954Q8OGDdOPf/xj9ejRQ4MHD9bixYvdr5eXl6uiokKjR492b7Pb7Ro5cqRKSkra3W9DQ4PHvH0xd/jOpvJqj7dFT2ZIOlhbr03l1f6bFE7pQPUR/frNMmXNW638Nz5W+TeHFR8XpWmXO/TefVfp2ZuGauR53Qg6gB/56lwajOdor9/ZMQxD559/vsfngwcP9vjc7BPVF198oYULF2rWrFl66KGHVFpaqjvvvFMxMTGaMmWKKioqJEnJyckeX5ecnOx+rS2FhYUqKCgwda7wn6q69v8Rnck4+AYtx0Bw89W5NBjP0V6fadauXevLebTJ5XJp2LBhmjdvniRp8ODB2rVrl5599ln3jdFnYvbs2Zo1a5b7c6fTqfT09LOeL/zD2xtWubE1MGg5BkKDr86lwXiO9jrsXHnllb6cR5tSU1OVmZnpsW3AgAH661//KklKSUmRJFVWVio19T83OlVWVuqSSy5pd78n32+E0DLCkaRUe5wqauvbvCZsk5Rib17iCP+pctZr6YZ9WkbLMRASfHUuDcZzdIeauZqamvTFF1/I5WpeHtrQ0KBXXnlFK1asUGVlpemTy87O1u7duz22ffbZZ+4VYQ6HQykpKVq9erX7dafTqY0bNyorK8v0+SA4REbYlJ/THIJPfn+g5fP8nExuTvaTHQdqNHPFNmU/vkZPrflc3x5uVJo9Tg/+oL9KZl+tuTcMIugAQchX59JgPEd7vfT8o48+0tixY1VVVaXMzEy99dZbuvbaa1VeXi6bzabo6Gi9/fbbGj58uGmTKy0t1WWXXaaCggL95Cc/0aZNmzRt2jQtWrRIkyZNktS8Ymv+/PkeS88/+ugjlp6HAXp2AoeWY8A66Nk5wbhx4xQfH6/8/Hw999xzeueddzRw4EAtW7ZMNptNubm5qqio0LvvvmvKH6DFm2++qdmzZ2vPnj1yOByaNWuWpk2b5n69pVRw0aJFqqmp0ahRo7RgwQKPm6lPh7ATuoKlnTNc0HIMWFOoNiibHnaSkpK0fv16DRgwQEePHlV8fLw+/PBDjRgxQpL08ccf68orr9Q333xzmj0FH8IOcGp7Kuu0hJZjAEHG9FJBwzAUFdU8/OT/laTIyEj3vTwAQl97LceZqQm6ZRQtxwBCh9dhZ+jQoXr88cdVUFCg559/Xg6HQ08//bSWLFkiSfrDH/6ggQMH+myiAPyDlmMAVuN12CksLNQPfvADFRUVqVu3blq7dq1uvfVWpaamKiIiQt99952Ki4t9OVcAPnSg+ohe+HCvXik9oLqG5ke/xMdFaeLwdN2claH0pM4BniEAnBmv79mRpMOHD+vTTz/VBRdcoK5du6q+vl7Lli3T0aNHNWbMGF1wwQW+nKvPcM8OwhUtxwBCmek3KFsZYQfhhpZjAFZg+g3KAEIfLccAwhFhBwgDOw7UqGh9uf6+86CONTW/mZtmj9PNl2Vo4vB0JXaOCfAMAcB3CDuARdFyDADNCDuAxdByDACezjrsGIYhl8ulyEjKxYBAOmXL8che6pFAyzGA8OT1e9jHjx/XI488oiuvvFL5+fmSpN/+9rfq2rWrOnfurClTpqixsdFnEwXQmstlaM2nlbrp+Y0a87v39fKm/ao/5lJmaoL+/x9frHUPXK1ZY84n6AAIa16/s1NQUKDnnntOkyZN0l/+8hdVVVXp73//uxYtWqSmpiY99NBDevLJJ3X//ff7cr4ARMsxAHSE12Fn+fLleu6553TdddcpLy9PF1xwgZYvX66f/vSnkqS4uDj9+te/JuwAPkTLMQB0nNdh5+uvv9bFF18sSerbt69iYmLcn0vS8OHDtW/fPvNnCIQ5wzC0sbxaS9bRcgwAZ8LrM6TdbldNTY3S09MlSUOGDFF8/H8KyBoaGnjbHDBRey3Hl/frrltGOWg5BgAveR12MjMztXXrVg0aNEiStH79eo/Xd+7cqX79+pk7OyAM0XIMAObyOuw8++yzio6Obvf1Y8eOcb8OcBZoOQYA3+BBoOJBoAic9lqOh/X+nm4ZRcsxAJyKXx4EWl9f36pbh7AAnF7NkUa9vOmA/lSyl5ZjAPCxDoedI0eO6P7779crr7yib7/9ttXrTU1NpkwMsCJajgHA/zocdu677z6tXbtWCxcu1E033aRnnnlGX331lf74xz9q/vz5vpgjENJcLkP//KxKRev36oM937i3Z6Ym6JZRDl13UarionncCgD4SofDTnFxsf70pz/pv/7rv5Sbm6vLL79cffv2Ve/evbVs2TJNmjTJF/MEQg4txwAQHDocdqqrq3XeeedJar4/p7q6WpI0atQo5eXlmTs7IATRcgwAwaXDYee8885TeXm5evXqpf79++uVV17RiBEjVFxcrMTERB9MEQh+hmFowxfVKlrfuuV4anaGJtByDAAB0+Gzb25urnbs2KErr7xSDz74oHJycvT000/r2LFjeuKJJ3wxRyBo0XIMAMHvrHt29u3bpy1btqhv37666KKLzJqXX9Gzg46qctbrpQ37tJyWYwAIGL/07EhS79691bt377PdDRASaDkGgNBzRmGntLRUa9euVVVVlVwul8drXMqC1Zyq5Tg326GxF9JyDADBrMNhZ968eXrkkUd0wQUXKDk52WPpLMtoYSXfHW7Uy6X79VLJPlqOASCEdTjs/P73v9eSJUs0depUH0wHCLzPKutUdFLLcbcuMZp0KS3HABCKOhx2IiIilJ2d7Yu5AAFzqpbj3OwM5VycRssxAISoDoedu+++W88884yefPJJH0wH8K/2Wo7HZCbrlmwHLccAYAEdDjv33nuvxo8frz59+igzM1PR0dEer7/22mumTQ7wFVqOASB8dDjs3HnnnVq7dq2uuuoqdevWjd96ETJoOQaA8NThM/uLL76ov/71rxo/frwv5gOYjpZjAAhvHQ47SUlJ6tOnjy/mApiqylmvpRv2aRktxwAQ1jocdh577DHl5+erqKhInTtzXwOCDy3HAIATdTjsPPXUU/q///s/JScnKyMjo9UNylu3bjVtcoC32ms5Hp7R3HL8/UxajgEgXHU47Fx//fU+mAZwZmqONOrlTQf0UslefU3LMQCgDWf91HMr4KnnoWdPZZ2KPtyr17b+p+W4e9cY/XwkLccAEC789tRzwF9O1XJ8yyiHrrsolZZjAEArHQ47TU1N+t3vfqdXXnlF+/fvV2Njo8fr1dXVpk0OkKTDDcf1lzZajr+fmaLc7AxajgEAp9ThsFNQUKDnnntO99xzjx555BE9/PDD2rt3r15//XU9+uijvpgjwtSB6iN68cO9+jMtxwCAs9Dhe3b69Omjp556SuPHj1d8fLy2b9/u3rZhwwYtX77cV3P1Ge7ZCR6GYWhjebWWrGvdcpybnaEbaTkGAPybz+7Zqaio0KBBgyRJXbt2VW1trSTpuuuu05w5c85wugh37bUcX3H+OcrNzqDlGABwxjocdnr27KmDBw+qV69e6tOnj9555x0NGTJEpaWlio2N9cUcYWG0HAMAfK3DYeeGG27Q6tWrNXLkSN1xxx2aPHmynn/+ee3fv1933323L+YIC/royxotWUfLMQDA9866Z6ekpEQlJSXq16+fcnJyzJqXX3HPjn/QcgwAMJPfenaysrKUlZV1truBhdFyDAAIJK/CzhtvvOH1Dn/4wx+e8WRgLXsq67Rk/V6t3NZGy/GlvdQjnpZjAIDveRV2vH0els1mU1NT09nMByGOlmMAQLDxKuy4XC5fzwMhjpZjAECwMq2d7csvv9SvfvUrLVq0yKxdIgTQcgwACHamPfV8x44dGjJkSEhexmI1VsfQcgwACAY89Ryma6/l+PJ+3XXLKActxwCAoETYwWnRcgwACGWEHbSLlmMAgBV4HXZuvPHGU75eU1NztnNBEGiv5XhY7+/pllG0HAMAQo/XYcduP3XLrd1u180333zWE7KKxuMuvVSyV/uqj6h3UmfdlJWhmKjgDQm0HLfW5DK0qbxaVXX16hEfpxGOJEVyTxKAU+C8EZxMW43lD/Pnz9fs2bN111136cknn5Qk1dfX65577tGKFSvU0NCgsWPHasGCBUpOTvZ6v2avxip8q0yLPyh3r1KSmjtnpl3u0OxrM896/2Zqq+W4W5cYTbq0tyaP7KUeCeHZcrxq10EVFJfp4L+DnySl2uOUn5OpcQNTAzgzAMGK84b/efvzO2TCTmlpqX7yk58oISFBV111lTvs5OXl6e9//7teeOEF2e12zZgxQxEREVq/fr3X+zYz7BS+VaY/vl/e7uu/vCLwgYeW41Nbteug8pZu1cn/MFp+N1s4eQgnLgAeOG8EhqWWnh86dEiTJk3S4sWL9Zvf/Ma9vba2Vs8//7yWL1+uq6++WpJUVFSkAQMGaMOGDbr00kv9Os/G4y4t/qD9oCNJiz8o1z3f7x+QS1q0HJ9ek8tQQXFZqxOWJBlqPnEVFJdpTGYKb00DkMR5IxQE700kJ5g+fbrGjx+v0aNHe2zfsmWLjh075rG9f//+6tWrl0pKStrdX0NDg5xOp8eHGV4q2etx6aotLqN5nD8dqD6i37xZpkvnrVb+Gx+r/JvDio+L0rTLHXrvvqv07E1DNfK8bmEfdCRpU3m1x1vQJzMkHayt16byav9NCkBQ47wR/IL+nZ0VK1Zo69atKi0tbfVaRUWFYmJilJiY6LE9OTlZFRUV7e6zsLBQBQUFZk9V+6qPmDrubJyq5XhqdoYm0HLcpqq69k9YZzIOgPVx3gh+Qf3T7sCBA7rrrrv07rvvKi7OvBtlZ8+erVmzZrk/dzqdSk9PP+v99vbyOVDejjsTtByfnR7x3v0983YcAOvjvBH8gjrsbNmyRVVVVRoyZIh7W1NTk95//309/fTTevvtt9XY2KiamhqPd3cqKyuVkpLS7n5jY2MVGxtr+nxvysrQ3Lc+OeWlrAhb8ziz0XJsjhGOJKXa41RRW9/m9XebpBR783JSAJA4b4SCoA4711xzjXbu3OmxLTc3V/3799cDDzyg9PR0RUdHa/Xq1ZowYYIkaffu3dq/f7+ysrL8Pt+YqAhNu9xxytVY0y53mHpzMi3H5oqMsCk/J1N5S7fKJnmcuFreD8vPyeQmQwBunDeCX1CHnfj4eA0cONBjW5cuXdStWzf39ltvvVWzZs1SUlKSEhISdMcddygrK8vvK7FatCwr92XPzqlajnOzHRp7IS3HZ2PcwFQtnDykVV9GCn0ZANrBeSO4BXXY8cbvfvc7RUREaMKECR6lgoE0+9pM3fP9/qY3KNNy7D/jBqZqTGYKTagAvMZ5I3iFTKmgL5ndoGy2tlqOu3eN0c9HhnfLMQAgvFmqVDAcnarlODc7QzkXp4V1yzEAAN4i7ASZQw3H9dc2Wo7HZCbrlmwHLccAAHQQYSdIHKg+ohc/3Ks/lx5QXcNxSVJ8XJQmDk/XzVkZSvdhNw8AAFZG2AkgWo4BAPA9fpIGAC3HAAD4D2HHj9pqOY6LjtCEIT1pOQYAwEcIO36w40CNitbTcgwAQCAQdnyElmMAAIIDYcdHjrsM5f/tY317uJGWYwAAAoiw4yNx0ZHK+68+ctYfp+UYAIAAIuz40P93+XmBngIAAGGPm0YAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClEXYAAIClRQV6AlbV5DK0qbxaVXX16hEfpxGOJEVG2AI9LQAAwg5hxwdW7TqoguIyHaytd29LtccpPydT4wamBnBmAACEHy5jmWzVroPKW7rVI+hIUkVtvfKWbtWqXQcDNDMAAMITYcdETS5DBcVlMtp4rWVbQXGZmlxtjQAAAL5A2DHRpvLqVu/onMiQdLC2XpvKq/03KQAAwhxhx0RVde0HnTMZBwAAzh5hx0Q94uNMHQcAAM4eYcdEIxxJSrXHqb0F5jY1r8oa4Ujy57QAAAhrhB0TRUbYlJ+TKUmtAk/L5/k5mfTtAADgR4Qdk40bmKqFk4coxe55qSrFHqeFk4fQswMAgJ9RKugD4wamakxmCg3KAAAEAd7ZAQAAlsY7Oz7A4yIAAAgevLNjMh4XAQBAcCHsmIjHRQAAEHwIOybicREAAAQfwo6JeFwEAADBh7Bjou5dYk0dBwAAzh6rsczkbY0OdTuAXzS5DJ/0XflqvwB8I6jDTmFhoV577TV9+umn6tSpky677DI9/vjjuuCCC9xj6uvrdc8992jFihVqaGjQ2LFjtWDBAiUnJ/t9vt8cajB1HIAz56sKCKolgNAT1Jex3nvvPU2fPl0bNmzQu+++q2PHjun73/++Dh8+7B5z9913q7i4WK+++qree+89ff3117rxxhsDMl+eeg4EB19VQFAtAYSmoH5nZ9WqVR6fv/DCC+rRo4e2bNmiK664QrW1tXr++ee1fPlyXX311ZKkoqIiDRgwQBs2bNCll17q1/mmeBlivB0HoONOVwFhU3MFxJjMlA5devLVfgH4XlC/s3Oy2tpaSVJSUpIkacuWLTp27JhGjx7tHtO/f3/16tVLJSUl7e6noaFBTqfT48MM4//wvqnjAHScryogqJYAQlfIhB2Xy6WZM2cqOztbAwcOlCRVVFQoJiZGiYmJHmOTk5NVUVHR7r4KCwtlt9vdH+np6abM8egxl6njAHScryogqJYAQlfIhJ3p06dr165dWrFixVnva/bs2aqtrXV/HDhwwIQZSp2ivTuc3o4D0HG+uneOe/KA0BUSP3VnzJihN998U2vXrlXPnj3d21NSUtTY2KiamhqP8ZWVlUpJSWl3f7GxsUpISPD4MMOqu640dRyAjhvhSFKqPa7dhgebmldPjXAkBcV+AfheUIcdwzA0Y8YMrVy5UmvWrJHD4fB4fejQoYqOjtbq1avd23bv3q39+/crKyvL39NVr+6dFXWaIxoV0TwOgG9ERtiUn5MpqXWlVcvn+TmZHb6J2Ff7BeB7NsMwgvaplLfffruWL1+uv/3tbx7dOna7XZ06dZIk5eXl6a233tILL7yghIQE3XHHHZKkDz/80Ovv43Q6ZbfbVVtba8q7PH0f+ruOt3FbTlSE9Pm88We9fwCnR88OYH3e/vwO6rBjs7X9G1JRUZGmTp0q6T+lgi+//LJHqeCpLmOdzOywI0n7vzmicb9/T0ePudQpOkKr7rqSd3QAP6NBGbA2S4Qdf/FF2AEAAL7l7c/voL5nBwAA4GwRdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKURdgAAgKVFBXoCCB5NLkObyqtVVVevHvFxGuFIUmSELdDTAgDgrBB2IElateugCorLdLC23r0t1R6n/JxMjRuYGsCZAQBwdriMBa3adVB5S7d6BB1JqqitV97SrVq162CAZgYAwNkj7IS5JpehguIyGW281rKtoLhMTa62RgAAEPwIO2FuU3l1q3d0TmRIOlhbr03l1f6bFAAAJiLshLmquvaDzpmMAwAg2BB2wlyP+DhTxwEAEGwIO2FuhCNJqfY4tbfA3KbmVVkjHEn+nBYAAKYh7IS5yAib8nMyJalV4Gn5PD8nk74dAEDIIuxA4wamauHkIUqxe16qSrHHaeHkIfTsAABCGqWCkNQceMZkptCgDACwHMIO3CIjbMrq0y3Q0wAAwFRcxgIAAJZG2AEAAJZG2AEAAJZmmbDzzDPPKCMjQ3FxcRo5cqQ2bdoU6CkBAIAgYImw8+c//1mzZs1Sfn6+tm7dqosvvlhjx45VVVVVoKcGAAACzBJh54knntC0adOUm5urzMxMPfvss+rcubOWLFkS6KkBAIAAC/mw09jYqC1btmj06NHubRERERo9erRKSkra/JqGhgY5nU6PDwAAYE0hH3a++eYbNTU1KTk52WN7cnKyKioq2vyawsJC2e1290d6ero/pgoAAAIg5MPOmZg9e7Zqa2vdHwcOHAj0lAAAgI+EfINy9+7dFRkZqcrKSo/tlZWVSklJafNrYmNjFRsb6/7cMAxJ4nIWAAAhpOXndsvP8faEfNiJiYnR0KFDtXr1al1//fWSJJfLpdWrV2vGjBle7aOurk6SuJwFAEAIqqurk91ub/f1kA87kjRr1ixNmTJFw4YN04gRI/Tkk0/q8OHDys3N9err09LSdODAAcXHx8tmM+/Bl06nU+np6Tpw4IASEhJM2y88cZz9h2PtHxxn/+A4+4cvj7NhGKqrq1NaWtopx1ki7Pz0pz/Vv/71Lz366KOqqKjQJZdcolWrVrW6abk9ERER6tmzp8/ml5CQwD8kP+A4+w/H2j84zv7BcfYPXx3nU72j08ISYUeSZsyY4fVlKwAAED7CcjUWAAAIH4QdH4qNjVV+fr7Hyi+Yj+PsPxxr/+A4+wfH2T+C4TjbjNOt1wIAAAhhvLMDAAAsjbADAAAsjbADAAAsjbADAAAsjbDjQ88884wyMjIUFxenkSNHatOmTYGeUkgrLCzU8OHDFR8frx49euj666/X7t27PcbU19dr+vTp6tatm7p27aoJEya0em4avDd//nzZbDbNnDnTvY1jbJ6vvvpKkydPVrdu3dSpUycNGjRImzdvdr9uGIYeffRRpaamqlOnTho9erT27NkTwBmHnqamJs2ZM0cOh0OdOnVSnz599Otf/9rjWUoc5457//33lZOTo7S0NNlsNr3++user3tzTKurqzVp0iQlJCQoMTFRt956qw4dOuSbCRvwiRUrVhgxMTHGkiVLjI8//tiYNm2akZiYaFRWVgZ6aiFr7NixRlFRkbFr1y5j+/btxrXXXmv06tXLOHTokHvMbbfdZqSnpxurV682Nm/ebFx66aXGZZddFsBZh65NmzYZGRkZxkUXXWTcdddd7u0cY3NUV1cbvXv3NqZOnWps3LjR+OKLL4y3337b+Pzzz91j5s+fb9jtduP11183duzYYfzwhz80HA6HcfTo0QDOPLTMnTvX6Natm/Hmm28a5eXlxquvvmp07drV+P3vf+8ew3HuuLfeest4+OGHjddee82QZKxcudLjdW+O6bhx44yLL77Y2LBhg/HBBx8Yffv2NX72s5/5ZL6EHR8ZMWKEMX36dPfnTU1NRlpamlFYWBjAWVlLVVWVIcl47733DMMwjJqaGiM6Otp49dVX3WM++eQTQ5JRUlISqGmGpLq6OqNfv37Gu+++a1x55ZXusMMxNs8DDzxgjBo1qt3XXS6XkZKSYvz2t791b6upqTFiY2ONl19+2R9TtITx48cbt9xyi8e2G2+80Zg0aZJhGBxnM5wcdrw5pmVlZYYko7S01D3mH//4h2Gz2YyvvvrK9DlyGcsHGhsbtWXLFo0ePdq9LSIiQqNHj1ZJSUkAZ2YttbW1kqSkpCRJ0pYtW3Ts2DGP496/f3/16tWL495B06dP1/jx4z2OpcQxNtMbb7yhYcOG6cc//rF69OihwYMHa/Hixe7Xy8vLVVFR4XGs7Xa7Ro4cybHugMsuu0yrV6/WZ599JknasWOH1q1bpx/84AeSOM6+4M0xLSkpUWJiooYNG+YeM3r0aEVERGjjxo2mz8kyz8YKJt98842amppaPYg0OTlZn376aYBmZS0ul0szZ85Udna2Bg4cKEmqqKhQTEyMEhMTPcYmJyeroqIiALMMTStWrNDWrVtVWlra6jWOsXm++OILLVy4ULNmzdJDDz2k0tJS3XnnnYqJidGUKVPcx7Ot8wjH2nsPPvignE6n+vfvr8jISDU1NWnu3LmaNGmSJHGcfcCbY1pRUaEePXp4vB4VFaWkpCSfHHfCDkLS9OnTtWvXLq1bty7QU7GUAwcO6K677tK7776ruLi4QE/H0lwul4YNG6Z58+ZJkgYPHqxdu3bp2Wef1ZQpUwI8O+t45ZVXtGzZMi1fvlwXXnihtm/frpkzZyotLY3jHEa4jOUD3bt3V2RkZKsVKpWVlUpJSQnQrKxjxowZevPNN7V27Vr17NnTvT0lJUWNjY2qqanxGM9x996WLVtUVVWlIUOGKCoqSlFRUXrvvff01FNPKSoqSsnJyRxjk6SmpiozM9Nj24ABA7R//35Jch9PziNn57777tODDz6oiRMnatCgQbrpppt09913q7CwUBLH2Re8OaYpKSmqqqryeP348eOqrq72yXEn7PhATEyMhg4dqtWrV7u3uVwurV69WllZWQGcWWgzDEMzZszQypUrtWbNGjkcDo/Xhw4dqujoaI/jvnv3bu3fv5/j7qVrrrlGO3fu1Pbt290fw4YN06RJk9z/n2Nsjuzs7FbVCZ999pl69+4tSXI4HEpJSfE41k6nUxs3buRYd8CRI0cUEeH5oy4yMlIul0sSx9kXvDmmWVlZqqmp0ZYtW9xj1qxZI5fLpZEjR5o/KdNveYZhGM1Lz2NjY40XXnjBKCsrM37xi18YiYmJRkVFRaCnFrLy8vIMu91u/POf/zQOHjzo/jhy5Ih7zG233Wb06tXLWLNmjbF582YjKyvLyMrKCuCsQ9+Jq7EMg2Nslk2bNhlRUVHG3LlzjT179hjLli0zOnfubCxdutQ9Zv78+UZiYqLxt7/9zfjoo4+MH/3oRyyJ7qApU6YY5557rnvp+WuvvWZ0797duP/++91jOM4dV1dXZ2zbts3Ytm2bIcl44oknjG3bthn79u0zDMO7Yzpu3Dhj8ODBxsaNG41169YZ/fr1Y+l5KPrDH/5g9OrVy4iJiTFGjBhhbNiwIdBTCmmS2vwoKipyjzl69Khx++23G9/73veMzp07GzfccINx8ODBwE3aAk4OOxxj8xQXFxsDBw40YmNjjf79+xuLFi3yeN3lchlz5swxkpOTjdjYWOOaa64xdu/eHaDZhian02ncddddRq9evYy4uDjjvPPOMx5++GGjoaHBPYbj3HFr165t83w8ZcoUwzC8O6bffvut8bOf/czo2rWrkZCQYOTm5hp1dXU+ma/NME6okQQAALAY7tkBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBAACWRtgBELQqKip01113qW/fvoqLi1NycrKys7O1cOFCHTlyRJKUkZEhm80mm82mLl26aMiQIXr11VdbvdbWx9SpU0/5/devX6+oqChdcsklPv6TAvClqEBPAADa8sUXXyg7O1uJiYmaN2+eBg0apNjYWO3cuVOLFi3Sueeeqx/+8IeSpF/96leaNm2anE6n/ud//kc//elPde6556q0tFRNTU2SpA8//FATJkzQ7t27lZCQIEnq1KlTu9+/pqZGN998s6655hpVVlb6/g8MwGcIOwCC0u23366oqCht3rxZXbp0cW8/77zz9KMf/UgnPtYvPj5eKSkpSklJ0TPPPKOlS5equLhYhYWF7jFJSUmSpB49eigxMfG03/+2227Tz3/+c0VGRur111837c8FwP+4jAUg6Hz77bd65513NH36dI+gcyKbzdbm9qioKEVHR6uxsfGMv39RUZG++OIL5efnn/E+AAQPwg6AoPP555/LMAxdcMEFHtu7d++url27qmvXrnrggQdafV1jY6MKCwtVW1urq6+++oy+9549e/Tggw9q6dKliorizW/ACgg7AELGpk2btH37dl144YVqaGhwb3/ggQfUtWtXde7cWY8//rjmz5+v8ePHn3Z/LcGpa9euuu2229TU1KSf//znKigo0Pnnn+/LPwoAP+LXFgBBp2/fvrLZbNq9e7fH9vPOO09S6xuL77vvPk2dOlVdu3ZVcnJyu5e4TrZ9+3b3/09ISFBdXZ02b96sbdu2acaMGZIkl8slwzAUFRWld95554zfMQIQOIQdAEGnW7duGjNmjJ5++mndcccd7d6306J79+7q27dvh7/PyV/jcrm0c+dOj20LFizQmjVr9Je//EUOh6PD3wNA4BF2AASlBQsWKDs7W8OGDdNjjz2miy66SBERESotLdWnn36qoUOHmv49IyIiNHDgQI9tPXr0UFxcXKvtAEIHYQdAUOrTp4+2bdumefPmafbs2fryyy8VGxurzMxM3Xvvvbr99tsDPUUAIcJmnFhWAQAAYDGsxgIAAJZG2AEAAJZG2AEAAJZG2AEAAJZG2AEAAJZG2AEAAJZG2AEAAJZG2AEAAJZG2AEAAJZG2AEAAJZG2AEAAJb2/wB2LOXNIVuDmAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "list1, list2 = gpt4_model_1, llama3_8b_model_1\n",
    "\n",
    "plt.scatter(list1, list2)\n",
    "plt.plot(\n",
    "    np.unique(list1),\n",
    "    np.poly1d(np.polyfit(list1, list2, 1))(np.unique(list1))\n",
    ")\n",
    "plt.xlabel(\"GPT-4\")\n",
    "plt.ylabel(\"Llama3 8B\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3e28b2d2-7f31-4c5f-853b-1e71dc715a25",
   "metadata": {},
   "source": [
    "### Correlation Coefficients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "44ef7e9a-1f07-4e94-bdc5-d5271616ef6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pearson</th>\n",
       "      <th>Spearman</th>\n",
       "      <th>Kendall Tau</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Results</th>\n",
       "      <td>0.80489</td>\n",
       "      <td>0.698406</td>\n",
       "      <td>0.57292</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Pearson  Spearman  Kendall Tau\n",
       "Results  0.80489  0.698406      0.57292"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from scipy.stats import spearmanr, kendalltau\n",
    "\n",
    "pearson_correlation = np.corrcoef(list1, list2)[0, 1]\n",
    "spearman_correlation, _ = spearmanr(list1, list2)\n",
    "kendall_tau_correlation, _ = kendalltau(list1, list2)\n",
    "\n",
    "correlation_table = pd.DataFrame({\n",
    "    \"Pearson\": [pearson_correlation],\n",
    "    \"Spearman\": [spearman_correlation],\n",
    "    \"Kendall Tau\": [kendall_tau_correlation]\n",
    "}, index=['Results'])\n",
    "\n",
    "correlation_table"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3a1bd708-ba5d-4290-abe3-ee736059c2cd",
   "metadata": {},
   "source": [
    "- For comparison, below are the correlation coefficients from the Prometheus 2 paper by Kim et al. 2024 ([https://arxiv.org/abs/2405.01535](https://arxiv.org/abs/2405.01535)), which are all in the same ballpark as the ones reported for Llama 3 above\n",
    "- Note that Prometheus 2 is a model specifically finetuned for LLM rating and evaluation "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fbc033f4-8a11-42be-a683-6cef7eb23468",
   "metadata": {},
   "source": [
    "#### Pearson\n",
    "\n",
    "| Evaluator LM          | VICUNA Bench | VICUNA Bench  | MT Bench   | MT Bench      | FLASK      | FLASK         | FLASK     | Feedback Bench |\n",
    "|-----------------------|--------------|---------------|------------|---------------|------------|---------------|-----------|----------------|\n",
    "|                       | GPT-4-1106   | Claude-3-Opus | GPT-4-1106 | Claude-3-Opus | GPT-4-1106 | Claude-3-Opus | Humans    | GPT-4-0613     |\n",
    "| LLAMA2-CHAT 7B        | 0.205        | 0.243         | 0.036      | 0.055         | 0.317      | 0.256         | 0.299     | 0.523          |\n",
    "| LLAMA2-CHAT 13B       | 0.185        | 0.141         | -0.042     | -0.002        | 0.239      | 0.247         | 0.263     | 0.545          |\n",
    "| LLAMA2-CHAT 70B       | 0.350        | 0.463         | 0.178      | 0.228         | 0.388      | 0.402         | 0.317     | 0.592          |\n",
    "| MISTRAL-INSTRUCT-7B   | 0.486        | 0.561         | 0.284      | 0.396         | 0.448      | 0.437         | 0.377     | 0.586          |\n",
    "| MIXTRAL-INSTRUCT-8X7B | 0.566        | 0.579         | 0.551      | 0.539         | 0.483      | 0.495         | 0.420     | 0.673          |\n",
    "| **PROMETHEUS-7B**     | **0.484**    | **0.528**     | **0.378**  | **0.382**     | **0.352**  | **0.331**     | **0.348** | **0.847**      |\n",
    "| **PROMETHEUS-13B**    | **0.492**    | **0.534**     | **0.404**  | **0.477**     | **0.462**  | **0.470**     | **0.449** | **0.860**      |\n",
    "| AUTO-J (13B)          | 0.351        | 0.262         | 0.432      | 0.375         | 0.430      | 0.370         | 0.473     | 0.637          |\n",
    "| **PROMETHEUS-2-7B**   | **0.642**    | **0.610**     | **0.543**  | **0.554**     | **0.645**  | **0.578**     | **0.544** | **0.878**      |\n",
    "| **PROMETHEUS-2-8X7B** | **0.685**    | **0.635**     | **0.665**  | **0.614**     | **0.659**  | **0.626**     | **0.555** | **0.898**      |\n",
    "| GPT-3.5-TURBO-0613    | 0.335        | 0.349         | 0.183      | 0.194         | 0.437      | 0.396         | 0.450     | 0.594          |\n",
    "| GPT-4-1106            | /            | 0.694         | /          | 0.717         | /          | 0.736         | 0.679     | 0.753          |\n",
    "| CLAUDE-3-OPUS         | 0.694        | /             | 0.717      | /             | 0.736      | /             | 0.573     | 0.788          |\n",
    "\n",
    "#### Spearman\n",
    "\n",
    "| Evaluator LM          | VICUNA Bench | VICUNA Bench  | MT Bench   | MT Bench      | MT Bench   | FLASK         | FLASK     | Feedback Bench |\n",
    "|-----------------------|--------------|---------------|------------|---------------|------------|---------------|-----------|----------------|\n",
    "|                       | GPT-4-1106   | Claude-3-Opus | GPT-4-1106 | Claude-3-Opus | GPT-4-1106 | Claude-3-Opus | Humans    | GPT-4-0613     |\n",
    "| LLAMA2-CHAT 7B        | 0.236        | 0.255         | 0.084      | 0.089         | 0.301      | 0.244         | 0.279     | 0.511          |\n",
    "| LLAMA2-CHAT 13B       | 0.178        | 0.179         | -0.025     | 0.044         | 0.206      | 0.222         | 0.224     | 0.543          |\n",
    "| LLAMA2-CHAT 70B       | 0.348        | 0.466         | 0.197      | 0.252         | 0.391      | 0.389         | 0.298     | 0.585          |\n",
    "| MISTRAL-INSTRUCT-7B   | 0.389        | 0.480         | 0.266      | 0.358         | 0.499      | 0.478         | 0.374     | 0.563          |\n",
    "| MIXTRAL-INSTRUCT-8X7B | 0.476        | 0.556         | 0.545      | 0.517         | 0.505      | 0.500         | 0.386     | 0.659          |\n",
    "| **PROMETHEUS-7B**     | **0.508**    | **0.528**     | **0.385**  | **0.349**     | **0.367**  | **0.326**     | **0.317** | **0.876**      |\n",
    "| **PROMETHEUS-13B**    | **0.492**    | **0.534**     | **0.401**  | **0.470**     | **0.474**  | **0.454**     | **0.398** | **0.893**      |\n",
    "| AUTO-J (13B)          | 0.337        | 0.297         | 0.408      | 0.365         | 0.402      | 0.358         | 0.408     | 0.623          |\n",
    "| **PROMETHEUS-2-7B**   | **0.643**    | **0.584**     | **0.550**  | **0.524**     | **0.626**  | **0.569**     | **0.490** | **0.909**      |\n",
    "| **PROMETHEUS-2-8X7B** | **0.660**    | **0.615**     | **0.669**  | **0.605**     | **0.642**  | **0.618**     | **0.496** | **0.912**      |\n",
    "| GPT-3.5-TURBO-0613    | 0.319        | 0.354         | 0.192      | 0.198         | 0.446      | 0.390         | 0.374     | 0.565          |\n",
    "| GPT-4-1106            | /            | 0.659         | /          | 0.721         | /          | 0.729         | 0.650     | 0.753          |\n",
    "| CLAUDE-3-OPUS         | 0.659        | /             | 0.721      | /             | 0.729      | /             | 0.567     | 0.784          |\n",
    "\n",
    "#### Kendall-Tau\n",
    "\n",
    "| Evaluator LM          | VICUNA Bench | VICUNA Bench  | MT Bench   | MT Bench      | FLASK      | FLASK         | FLASK     | Feedback Bench |\n",
    "|-----------------------|--------------|---------------|------------|---------------|------------|---------------|-----------|----------------|\n",
    "|                       | GPT-4-1106   | Claude-3-Opus | GPT-4-1106 | Claude-3-Opus | GPT-4-1106 | Claude-3-Opus | Humans    | GPT-4-0613     |\n",
    "| LLAMA2-CHAT 7B        | 0.183        | 0.203         | 0.065      | 0.070         | 0.229      | 0.186         | 0.211     | 0.419          |\n",
    "| LLAMA2-CHAT 13B       | 0.145        | 0.146         | -0.019     | 0.037         | 0.160      | 0.174         | 0.174     | 0.453          |\n",
    "| LLAMA2-CHAT 70B       | 0.282        | 0.382         | 0.150      | 0.196         | 0.310      | 0.310         | 0.221     | 0.487          |\n",
    "| MISTRAL-INSTRUCT-7B   | 0.314        | 0.391         | 0.208      | 0.281         | 0.395      | 0.384         | 0.287     | 0.454          |\n",
    "| MIXTRAL-INSTRUCT-8X7B | 0.395        | 0.468         | 0.433      | 0.419         | 0.410      | 0.408         | 0.304     | 0.551          |\n",
    "| **PROMETHEUS-7B**     | **0.405**    | **0.425**     | **0.290**  | **0.263**     | **0.282**  | **0.251**     | **0.236** | **0.770**      |\n",
    "| **PROMETHEUS-13B**    | **0.397**    | **0.434**     | **0.299**  | **0.352**     | **0.365**  | **0.352**     | **0.299** | **0.793**      |\n",
    "| AUTO-J (13B)          | 0.282        | 0.242         | 0.303      | 0.272         | 0.312      | 0.282         | 0.312     | 0.515          |\n",
    "| **PROMETHEUS-2-7B**   | **0.515**    | **0.478**     | **0.458**  | **0.421**     | **0.500**  | **0.454**     | **0.376** | **0.773**      |\n",
    "| **PROMETHEUS-2-8X7B** | **0.559**    | **0.515**     | **0.535**  | **0.483**     | **0.526**  | **0.507**     | **0.388** | **0.800**      |\n",
    "| GPT-3.5-TURBO-0613    | 0.255        | 0.287         | 0.148      | 0.157         | 0.360      | 0.315         | 0.298     | 0.489          |\n",
    "| GPT-4-1106            | /            | 0.553         | /          | 0.590         | /          | 0.609         | 0.517     | 0.662          |\n",
    "| CLAUDE-3-OPUS         | 0.553        | /             | 0.590      | /             | 0.609      | /             | 0.453     | 0.693          |"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
